library(datasets)
library(Hmisc)
Attaching package: ‘Hmisc’
The following object is masked from ‘package:e1071’:
impute
The following objects are masked from ‘package:base’:
format.pval, units
library(grid)
library(ggplot2)
library(moments)
Attaching package: ‘moments’
The following objects are masked from ‘package:e1071’:
kurtosis, moment, skewness
library(e1071)
df <- read.csv('stock_details_5_years.csv')
df_googl <- df[df$Company == "GOOGL", ]
df_googl
NA
NA
ts.plot(df_googl$Open, xlab="Time", ylab = 'Googl Opening Prices')
diff_open <- diff(df_googl$Open, lag = 1, differences = 1)
mean_diff <- mean(diff_open, na.rm = TRUE)
plot(density(diff_open, na.rm = TRUE), main = "Density of First Differences in Open Price",
xlab = "First Differences", ylab = "Density")
abline(v = mean_diff, col = "red", lwd = 2, lty = 2)
NA
NA
# Compute first differences
diff_open <- diff(df_googl$Open, lag = 1, differences = 1)
# Calculate statistics
mean_diff <- mean(diff_open, na.rm = TRUE)
sd_diff <- sd(diff_open, na.rm = TRUE)
min_diff <- min(diff_open, na.rm = TRUE)
max_diff <- max(diff_open, na.rm = TRUE)
skewness_diff <- skewness(diff_open, na.rm = TRUE)
kurtosis_diff <- kurtosis(diff_open, na.rm = TRUE)
quantiles_diff <- quantile(diff_open, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
# Print results
list(
Mean = mean_diff,
Std_Dev = sd_diff,
Min = min_diff,
Max = max_diff,
Skewness = skewness_diff,
Kurtosis = kurtosis_diff,
Quantiles = quantiles_diff
)
$Mean
[1] 0.06634328
$Std_Dev
[1] 1.916098
$Min
[1] -9.669998
$Max
[1] 13.6555
$Skewness
[1] 0.1237204
$Kurtosis
[1] 4.495004
$Quantiles
25% 50% 75%
-0.8550034 0.1090012 1.0299988
Around day 800 something happens, after which the data is more spread out than in the beginning.
library(lubridate)
df_googl$Date2 <- ymd_hms(df_googl$Date) # Convert to POSIXct
ggplot(data=df_googl,aes(x=Date2,y=Open))+
geom_segment(aes(xend=Date2,yend=Open+Open_diff),arrow=arrow(length=unit(.2,"cm")))+
stat_density2d(aes(colour=..level..))+
labs(
title = "GOOGL Opening Stock - 2D Vector Density Plot",
x = "Date",
y = "Opening Price",
colour = "Density Level"
)
## TEST DATA
Open_diff2 <- append(Open_diff, 0)
length(Open_diff2)
[1] 1258
openLM <- lm(Open_diff2~Open, data=df_googl)
summary(openLM)
Call:
lm(formula = Open_diff2 ~ Open, data = df_googl)
Residuals:
Min 1Q Median 3Q Max
-9.6491 -0.9218 0.0243 0.9474 13.6759
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.271651 0.183060 1.484 0.138
Open -0.002123 0.001808 -1.174 0.241
Residual standard error: 1.915 on 1256 degrees of freedom
Multiple R-squared: 0.001096, Adjusted R-squared: 0.0003009
F-statistic: 1.378 on 1 and 1256 DF, p-value: 0.2406